from lxml import etree
from html.parser import HTMLParser


class StripTagsHTMLParser(HTMLParser):
    data = ""

    def handle_data(self, data):
        self.data += data

    def getData(self):
        return self.data


def strip_htmltags(html):
    if not html:
        return ""
    parser = StripTagsHTMLParser()
    parser.feed(html)
    data = parser.getData()
    return data


def handle_htmltags(htmlstr):
    """
    处理html标签
    """
    htmlstr_new = strip_htmltags(htmlstr)
    if htmlstr_new:
        return htmlstr_new
    else:
        response = etree.HTML(text=htmlstr)
        return response.xpath("string(.)")

if __name__ == '__main__':
    htmlstr = "<p>hello world</p>"
    print(handle_htmltags(htmlstr))
